Please follow the instructions in the README file before starting this tutorial.
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
if (!require(tidyverse)) install.packages("tidyverse")
if (!require(readxl)) install.packages("readxl")
if (!require(RSQLite)) install.packages("RSQLite")
if (!require(DT)) install.packages("DT")
if (!require(plotly)) install.packages("plotly")
if (!require(ggsignif)) install.packages("ggsignif")
if (!require(ggdendro)) install.packages("ggdendro")
if (!require(patchwork)) install.packages("patchwork")library(tidyverse)We now need load some data to plot
We can load this from most of the popular formats
From a comma separated formated file (csv):
read_csv('data/stats.csv', show_col_types = FALSE)read_tsv('data/stats.tsv', show_col_types = FALSE)readxl packagelibrary(readxl)
read_xlsx('data/stats.xlsx')RSQLite package (to load from MIcrosoft Access you could use the RODBC package)library(RSQLite)
conn <- dbConnect(RSQLite::SQLite(), "data/stats.db")
dbGetQuery(conn, "SELECT * FROM stats")stats <- read_xlsx('data/stats.xlsx')stats variable, we can verify this by just running the variable namestatsDT package and the datatable functionlibrary(DT)
datatable(stats)%>% to run other filtering and renaming functions on our data, here we use the filter() function to filter the data by the reads column. Notice that the number of rows reduces from 58 to 21stats %>%
filter(reads > 1000000)reads and the coverage columns, we now only have 8 rowsstats %>%
filter(reads > 1000000, coverage > 110)mutate() function to add a new column called total by calculating the sum of the coverage and the reads columnsstats %>%
mutate(total = reads + coverage)rename() function to change the reads column name to countsstats %>%
rename(counts = reads)ggplot() need the name of the variable containing the data and which variables to use in the plotggplot(stats, aes(x=isolate, y=reads)) geom_bar() will calculate the height of the bars from the data, but if we want to specify the data for the y-axis we need to supply stat="identity"ggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity")ggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue')ggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='#ff0066', fill='#cc0000')theme_bw(), theme_minimal(), theme_classic(); more details and examples can be found at https://ggplot2.tidyverse.org/reference/ggtheme.htmltheme_bw() themeggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
theme_bw()theme()ggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90)
)vjust and hjust parametersggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
)labs) and add a title (ggtitle), change the font and font sizes of the axis labelsggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") + theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5, size=10),
axis.title = element_text(size=15, family='Comic Sans MS')
)ggplot(stats, aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") + theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
) +
coord_flip()stats data object to the ggplot() function. Very often its easier to pipe the data into the ggplot() function using %>%. As you can see, the stats variable name is now outside the ggplot() functionstats %>%
ggplot(aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") + theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
)grepl() is just a pattern matching function, so filters isolates with the string SER in their namestats %>%
filter(grepl("SER",isolate)) %>%
ggplot(aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") +
theme_bw()geom_text()stats %>%
filter(grepl("SER",isolate)) %>%
ggplot(aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") +
geom_text(aes(label=reads), vjust=-0.3, size=3.5) +
theme_bw()reads against coveragestats %>%
ggplot(aes(x=reads, y=coverage)) +
geom_point() +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
) geom_smooth()stats %>%
ggplot(aes(x=reads, y=coverage)) +
geom_point() +
geom_smooth() +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
) `geom_smooth()` using method = 'loess' and formula 'y ~ x'
xlim() and/or ylim()stats %>%
ggplot(aes(x=reads, y=coverage)) +
geom_point() +
geom_smooth() +
xlim(c(0, 600000)) +
ylim(c(0, 100)) +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
) `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Warning: Removed 21 rows containing non-finite values (stat_smooth).
Warning: Removed 21 rows containing missing values (geom_point).
plotly packagelibrary(plotly)
coverage <- stats %>%
ggplot(aes(x=reads, y=coverage)) +
geom_point() +
geom_smooth() +
xlim(c(0, 600000)) +
ylim(c(0, 100)) +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
)
ggplotly(coverage)`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Warning: Removed 21 rows containing non-finite values (stat_smooth).
data/data.csv, plot a bar chart of county against popadults for the state of Ohio (OH)area against poptotalglm methodxlim() and/or ylim()?geom_linestate columnpopdensity column?theme?scale_y_continuousToothGrowthdose against len colouring by suppToothGrowth %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_point() +
theme_bw()geom_point with geom_jitterToothGrowth %>%
ggplot(aes(x=dose, y=len, group=supp, color=supp)) +
geom_jitter(width=0.1) +
theme_bw()dose values to characters using the mutate() and as.character() functionsToothGrowth %>%
mutate(dose=as.character(dose)) %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_boxplot(color='black') +
theme_bw()ToothGrowth %>%
mutate(dose=as.character(dose)) %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_boxplot(color='black') +
geom_jitter(width=0.1) +
theme_bw()supp variable, by removing color="black" from geom_boxplot()ToothGrowth %>%
mutate(dose=as.character(dose)) %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_boxplot() +
geom_jitter(width=0.1) +
theme_bw()ToothGrowth %>%
mutate(dose=as.character(dose)) %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_boxplot(color='black') +
geom_jitter(width=0.1) +
labs(x="Vitamin C dose", y="Tooth length") +
theme_bw() +
theme(
legend.title = element_blank() # this removes the legend title
)ggsignif packagelibrary(ggsignif)
ToothGrowth %>%
mutate(dose=as.character(dose)) %>%
ggplot(aes(x=dose, y=len, color=supp)) +
geom_boxplot(color='black') +
geom_jitter(width=0.1) +
labs(x="Vitamin C dose", y="Tooth length") +
theme_bw() +
theme(
legend.title = element_blank() # this removes the legend title
) +
ylim(0,40) +
geom_signif(comparisons = list(c("0.5", "1")), map_signif_level = TRUE, textsize = 6, y_position = 30, colour="black", annotation=c('**')) +
geom_signif(comparisons = list(c("1", "2")), map_signif_level = TRUE, textsize = 6, y_position = 36, colour="black", annotation=c('*')) ToothGrowthsummary <- ToothGrowth %>%
group_by(supp, dose) %>%
summarise(mean=mean(len), sd=sd(len))`summarise()` has grouped output by 'supp'. You can override using the `.groups` argument.
summarysummary %>%
ggplot(aes(x=dose, y=mean, color=supp)) +
geom_line() +
theme_bw()summary %>%
ggplot(aes(x=dose, y=mean, color=supp)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.05) +
theme_bw()summary %>%
ggplot(aes(x=dose, y=mean, color=supp)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.05,
position=position_dodge(0.05)) +
theme_bw()mtcars. First take a look and explorecyl on the x-axis and mpg on the y-axismpg values, coloured by the number of carburetorsUSarrests, first explore the dataUSArrestshclust function, this uses euclidean distances by defaultdist(USArrests)) can be any kind of similarity matrix, e.g pearson correlationshc <- hclust(dist(USArrests), "ave") # hierarchical clustering
hc
Call:
hclust(d = dist(USArrests), method = "ave")
Cluster method : average
Distance : euclidean
Number of objects: 50
ggdendrogram() function from the `ggdendro packagelibrary(ggdendro)
p <- ggdendrogram(hc, rotate = TRUE, size = 2)
pggplotly()ggplotly(p)reads and coverage, notice that they won’t be printed when you run the codereads <- stats %>%
ggplot(aes(x=isolate, y=reads)) +
geom_bar(stat="identity", colour='black', fill='blue') +
ggtitle("Read counts") +
labs(x="Isolate name", y="Read count") + theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
)
coverage <- stats %>%
ggplot(aes(x=isolate, y=coverage)) +
geom_bar(stat="identity", colour='black', fill='#888888') +
ggtitle("Coverage") +
labs(x="Isolate name", y="Coverage") +
theme_bw() +
theme(
axis.text.x = element_text(angle=-90, hjust=1, vjust=0.5)
)readscoveragepatchwork packagelibrary(patchwork)
reads + coveragereads / coveragereads / coverage + plot_annotation(tag_levels = 'A') + plot_layout(guides = "collect")reads plot but changing the underlying theme. We can also add a new title, note that this is overriding the ggtitle() already in the reads plot. Can you spot the differences in the themes?
p1 <- reads + theme_bw() + ggtitle('theme_bw()')
p2 <- reads + theme_classic() + ggtitle('theme_classic()')
p3 <- reads + theme_minimal() + ggtitle('theme_minimal()')
p4 <- reads + theme_dark() + ggtitle('theme_dark()')
( p1 + p2 ) / ( p3 + p4 ) + plot_annotation(tag_levels = 'A')Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
sessionInfo()R version 4.1.1 (2021-08-10)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS: /opt/R/4.1.1/lib64/R/lib/libRblas.so
LAPACK: /opt/R/4.1.1/lib64/R/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C LC_TIME=en_GB.UTF-8
[4] LC_COLLATE=en_GB.UTF-8 LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
[7] LC_PAPER=en_GB.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggsignif_0.6.3 ggdendro_0.1.23 gapminder_0.3.0 gganimate_1.0.7 plotly_4.10.0 patchwork_1.1.1
[7] DT_0.23 readxl_1.3.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
[13] readr_2.0.1 tidyr_1.1.3 tibble_3.1.4 ggplot2_3.3.5 tidyverse_1.3.1 RSQLite_2.2.14
loaded via a namespace (and not attached):
[1] nlme_3.1-152 fs_1.5.0 lubridate_1.7.10 bit64_4.0.5 progress_1.2.2
[6] httr_1.4.2 tools_4.1.1 backports_1.2.1 bslib_0.3.1 utf8_1.2.2
[11] R6_2.5.1 DBI_1.1.1 lazyeval_0.2.2 mgcv_1.8-36 colorspace_2.0-2
[16] withr_2.4.2 tidyselect_1.1.1 prettyunits_1.1.1 bit_4.0.4 compiler_4.1.1
[21] cli_3.0.1 rvest_1.0.1 xml2_1.3.2 labeling_0.4.2 sass_0.4.1
[26] scales_1.1.1 digest_0.6.27 rmarkdown_2.11 pkgconfig_2.0.3 htmltools_0.5.2
[31] dbplyr_2.1.1 fastmap_1.1.0 htmlwidgets_1.5.4 rlang_0.4.11 rstudioapi_0.13
[36] jquerylib_0.1.4 farver_2.1.0 generics_0.1.0 jsonlite_1.7.2 crosstalk_1.2.0
[41] vroom_1.5.5 magrittr_2.0.1 Matrix_1.3-4 Rcpp_1.0.7 munsell_0.5.0
[46] fansi_0.5.0 lifecycle_1.0.0 stringi_1.7.4 yaml_2.2.1 MASS_7.3-54
[51] plyr_1.8.7 grid_4.1.1 blob_1.2.2 parallel_4.1.1 crayon_1.4.1
[56] lattice_0.20-44 haven_2.4.3 splines_4.1.1 hms_1.1.0 knitr_1.34
[61] pillar_1.6.2 reprex_2.0.1 glue_1.4.2 evaluate_0.14 data.table_1.14.0
[66] modelr_0.1.8 vctrs_0.3.8 tzdb_0.1.2 tweenr_1.0.2 cellranger_1.1.0
[71] gtable_0.3.0 assertthat_0.2.1 cachem_1.0.6 xfun_0.26 broom_0.7.9
[76] viridisLite_0.4.0 memoise_2.0.1 ellipsis_0.3.2